* This program runs a wild cluster bootstrap t procedure for it panel settings
* with few cross-sectional units and many time periods. It uses areg to absorb
* the period dummies and includes cluster dummies in the covariates. It clusters
* at the cluster dummy level and adjusts SEs for the correct dof adjustment.
* It then provides analytic p-values based on the t(G-1) distribution. And it
* then provides bootstrap p-values using the wild cluster bootstrap t procedure
* with the null hypothesis imposed, as recommended by Cameron, Gelbach and Miller.
*
* The syntax is:
*     wild_areg depvar var1 var2 [if] [weight],i(clustvar) t(timevar) control(controlvars) bootstraps(#)
* where we want coefficients, SEs, and p-values (against null hypotheses of 0) for
* var1 and var2. Controlvars includes covariates that have no null hypothesis, and 
* for which we don't care about the parameter estimates.
*
* Note that you need to have the program "unique" installed.

#delimit ;

cap prog drop wild_areg ;
prog def wild_areg ;

   syntax varlist [if] [in] [aweight fweight pweight /],Ivar(varname) Tvar(varname) Bootstraps(integer) [Control(varlist)]; /*define syntax*/
   
   preserve;

   local lvarlist = wordcount("`varlist'"); /*length of varlist*/
   local numhyp = `lvarlist'-1; /*number of variables with hypotheses we wish to test*/
   
   tokenize `varlist';
   
   tempvar touse; /*mark sample*/
   mark `touse' `if' `in';
   markout `touse' `*' `ivar' `tvar' `control';
   
	if "`weight'"~="" {; /* deal with weights */
		local weight "[`weight'=`exp']";
	};
	else {;
		local weight "";
	};
   
   qui areg `varlist' `control' i.`ivar' if `touse' `weight',absorb(`tvar') cluster(`ivar'); /*main regression*/
   qui unique `ivar'; /*count clusters*/
   local numclusters = r(sum); /*number of clusters*/
   local numcontrols = wordcount("`control'"); /*number of controls*/
   local numcovars = `numhyp'+`numcontrols'; /*number of covariates, without cluster dummies*/
   local se_adjustment = sqrt((e(N)-`numcovars'-`numclusters'+1)/(e(N)-`numcovars'+1)); /*adjustment for SEs in xtreg, instead of areg*/ 
   forvalues i=1/`numhyp' {; /*save main regression results for variables with hypotheses*/
     local j = `i'+1;
	 local b_main`i' = _b[``j''];
	 local se_main`i' = `se_adjustment'*_se[``j''];
     local t_main`i' = `b_main`i''/`se_main`i'';
	 local p_main`i' = 2*ttail(`numclusters'-1,abs(`t_main`i''));
   };  
   
   local t_vars = " ";
   forvalues i=1/`numhyp' {;
     local t_vars = "`t_vars'" + "t_wild`i' ";
	 local restricted`i' " "; /*this list of covariates leaves out a variable, thus imposing the null hypothesis*/
	 forvalues j=2/`lvarlist' {;
	   if `j'!=`i'+1 {;
	     local restricted`i' = "`restricted`i''" + "``j'' ";
	   };
	 };
	 qui areg `1' `restricted`i'' `control' i.`ivar' if `touse' `weight',absorb(`tvar') cluster(`ivar'); /*runs restricted regression with null hyp imposed*/
	 qui predict epshat`i',resid; /*residual under null hypothesis i*/
	 qui predict yhat`i',xbd; /*predicted value under null hypothesis i*/
   };
   	 
   sort `ivar';
   tempfile main bootsave;
   qui save `main',replace;
   cap erase `bootsave';
   qui postfile bskeep `t_vars' using `bootsave',replace; /*saves results for null hypothesis i in each bootstrap iteration*/
 
   forvalues b = 1/`bootstraps' { ;
     use `main', replace ;
	 local bst_wild_list " ";
     qui by `ivar': gen temp = uniform();
     qui by `ivar': gen pos = (temp[1] < .5) ;
	 forvalues i=1/`numhyp' {;
	   local j = `i'+1;
	   qui gen wildresid`i' = epshat`i' * (2*pos - 1); /*bootstrap residual with Rademacher weights*/
       qui gen wildy`i' = yhat`i' + wildresid`i';
       qui areg wildy`i' ``j'' `restricted`i'' `control' i.`ivar' if `touse' `weight',absorb(`tvar') cluster(`ivar'); /*bootstrap regression*/
       local bst_wild`i' = _b[``j'']/_se[``j''];
	   local bst_wild_list = "`bst_wild_list'" + "(`bst_wild`i'') ";
     };
     post bskeep `bst_wild_list';
   };
   qui postclose bskeep;

   qui drop _all;
   qui set obs 1; /*add observation with actual t statistic from main regression, so we can look at its percentile in the bootstrap dist*/
   forvalues i=1/`numhyp' {;
     gen t_wild`i' = `t_main`i'';
   }; 
   qui append using `bootsave';
   
   di " ";
   qui gen n = . ;
   forvalues i=1/`numhyp' {; /*loop to report results*/
     local j = `i'+1;
     qui summ t_wild`i';
     local bign = r(N);
     sort t_wild`i';
     qui replace n = _n;
     qui sum n if abs(t_wild`i' - `t_main`i'') < .000001;
     local myp = r(mean) / `bign';
     local p_wild`i' = 2 * min(`myp',(1-`myp'));
	 di in green "``j''";
	 di in yellow "  Coefficient:       " %7.5f `b_main`i'';
	 di in yellow "  Standard Error:    " %7.5f `se_main`i'';
	 di in yellow "  Analytic p-value:  " %7.3f `p_main`i'';
	 di in yellow "  Bootstrap p-value: " %7.3f `p_wild`i'';
   };   
   
   restore;
   
end;
